#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed May  4 09:33:14 2022

@author: cythnia
"""

#————————————————————————————————————————————#
#招聘网站爬虫（以猎聘为例，爬取企业招聘信息）
#————————————————————————————————————————————#

#导入工具包
import pandas as pd
import numpy as np
import requests
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
#设置url
url='https://www.liepin.com/zhaopin/?inputFrom=head_navigation&scene=init&workYearCode=0&ckId=0gscakx8f8hslpeu1gdxj502r6lmdfy9'

#设置请求头
headers={
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'
    }
#设置驱动浏览器
driver=webdriver.Chrome()
driver.get(url)
lis=[]
sousuo=driver.find_element_by_css_selector('#lp-search-bar-section > div > div > div > div > div > div:nth-child(1) > div.jsx-3599059289.search-input-container > div > div > div > input')
sousuo.send_keys('生信工程师')
sousuo.send_keys(Keys.ENTER)
#选择行业
driver.find_element_by_css_selector('#filter-options-selector-bar > div > div:nth-child(2) > span > span.ant-cascader-picker-label').click()
time.sleep(2)
driver.find_element_by_xpath(" /html/body/div[2]/div/div/div/ul/li[10]").click()
time.sleep(2)
driver.find_element_by_xpath(" /html/body/div[2]/div/div/div/ul[2]/li[1]").click()
time.sleep(2)
#选择性质
driver.find_element_by_css_selector('#rc_select_1').click()
time.sleep(3)
driver.find_element_by_css_selector('body > div:nth-child(22) > div > div > div > div.rc-virtual-list > div.rc-virtual-list-holder > div > div > div:nth-child(2) > div').click()
time.sleep(3)
#打开需要爬取的网站
# for urli in url:
#     driver.get(urli)
#     #等待5-8秒再进行操作
#     driver.implicitly_wait(np.random.randint(5,8))
#     #获取信息
#     #公司
#     gongsi=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > div > div > span')
#     #岗位名称
#     mingcheng=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-detail-header-box > div > div.ellipsis-1')
#     #地点
#     didian=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-detail-header-box > div > div.job-dq-box > span.ellipsis-1')
#     #薪资
#     xinzi=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-detail-header-box > span')
#     #经验要求
#     jingyan=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-labels-box > span:nth-child(1)')
#     #学历要求
#     xueli=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-labels-box > span:nth-child(2)')
    
#     print('已完成爬取第：'+urli+'页')
#     #汇总数据
#     for gongsis,mingchengs,didians,xinzis,jingyans,xuelis in zip(gongsi,mingcheng,didian,xinzi,jingyan,xueli):
#         gongsii=gongsis.text
#         mingchengi=mingchengs.text
#         didiani=didians.text
#         xinzii=xinzis.text
#         jingyani=jingyans.text
#         xuelii=xuelis.text
#         lis.append([gongsii,mingchengi,didiani,xinzii,jingyani,xuelii])
#     time.sleep(np.random.randint(5,8))
#     print('已汇总完成第：'+urli+'页')
#爬取有名称的公司
for i in range(0,8):
    # driver.get(urli)
    # #等待5-8秒再进行操作
    # driver.implicitly_wait(np.random.randint(5,8))
    #获取信息
    #公司
    gongsi=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(2) > div > div > span')
    #岗位名称
    mingcheng=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-detail-header-box > div > div.ellipsis-1')
    #地点
    didian=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-detail-header-box > div > div.job-dq-box > span.ellipsis-1')
    #薪资
    xinzi=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-detail-header-box > span')
    #经验要求
    jingyan=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-labels-box > span:nth-child(1)')
    #学历要求
    xueli=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-labels-box > span:nth-child(2)')
    lianjie=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1)')
    print('已完成爬取第：'+str(i+1)+'页')
    #汇总数据
    for gongsis,mingchengs,didians,xinzis,jingyans,xuelis,lianjies in zip(gongsi,mingcheng,didian,xinzi,jingyan,xueli,lianjie):
        gongsii=gongsis.text
        mingchengi=mingchengs.text
        didiani=didians.text
        xinzii=xinzis.text
        jingyani=jingyans.text
        xuelii=xuelis.text
        lianjiei=lianjies.get_attribute('href')
        lis.append([gongsii,mingchengi,didiani,xinzii,jingyani,xuelii,lianjiei])
    driver.find_element_by_css_selector('body > div > div > section.content-left-section > div > div > ul > li.ant-pagination-next > a > button').click()
    time.sleep(np.random.randint(3,5))
    print('已汇总完成第：'+str(i+1)+'页')
result=pd.DataFrame(lis,columns=['公司名称','岗位名称','工作地点','薪资待遇','经验要求','学历要求','链接'])
result.to_excel('/Users/cythnia/Desktop/猎聘生信工程师招聘信息.xlsx',index=False)    
